The following packages are required to run the code in this script.
#devtools::install_github("EIvimeyCook/ShinyDigitise")
#library(shinyDigitise)
pacman::p_load(tidyr, # For tidying data, including reshaping and separating data into columns.
dplyr, # For data manipulation, including filtering, selecting, and summarizing data.
stringr, # For string manipulation, including pattern matching and text extraction.
readr,
here, # For constructing file paths relative to the root of the project directory.
lmtest, # For conducting diagnostic tests and hypothesis tests on linear models.
dm, # For working with data models and relational data within R.
data.table, # For fast and efficient data manipulation, particularly with large datasets.
metaDigitise,# For extracting and digitizing data from plots in scientific literature.
clubSandwich,# For computing robust standard errors in meta-analysis, particularly in clustered data.
metafor, # For conducting meta-analyses, including univariate and multivariate models.
orchaRd, # For visualizing meta-analysis results, including bubble plots and forest plots.
ggplot2, # For creating complex and customizable visualizations using the grammar of graphics.
MuMIn, # For model selection and model averaging, particularly in regression models.
kableExtra, # Enhances the formatting capabilities of tables
patchwork, # Facilitates combining multiple ggplot2 plots into a single layout
ggalluvial, # Extends ggplot2 to create alluvial diagrams
ggbreak)
We need to perform data wrangling to optimally structure the data for analysis. The following code chunk outlines the steps taken, along with brief descriptions of each.
study_data <- read.csv(here("data/study_data.csv"))
fw_data <- read.csv(here("data/fw_data.csv"))
PFAS_data <- read.csv(here("data/PFAS_data.csv"))
TMF_data <- read.csv(here("data/TMF_data.csv"))
# appr_data <- read.csv(here("data/appr_data.csv"))
TMF_data <- TMF_data %>%
mutate(across(everything(), # Replace "na" strings with actual NA values
~ ifelse(. == "na", NA, .)))
function1 <- function(data) {
# Convert necessary columns to numeric, handling any potential issues
data <- data %>%
mutate(
TMF_lower_95CI = as.numeric(TMF_lower_95CI),
TMF_upper_95CI = as.numeric(TMF_upper_95CI),
TMF_se = as.numeric(TMF_se),
TMS_lower_95CI = as.numeric(TMS_lower_95CI),
TMS_upper_95CI = as.numeric(TMS_upper_95CI),
TMS_se = as.numeric(TMS_se),
TMS = as.numeric(TMS),
TMF = as.numeric(TMF),
pvalue_adj = as.numeric(pvalue_adj),
TMS_calculated = as.numeric(TMS_calculated),
TMS_se_calculated = as.numeric(TMS_se_calculated)
) %>%
mutate(
# Calculate TMF_se if missing and TMF 95% CI is available
TMF_se = ifelse(is.na(TMF_se) & !is.na(TMF_lower_95CI) & !is.na(TMF_upper_95CI),
(TMF_upper_95CI - TMF_lower_95CI) / 1.96, TMF_se),
# Calculate TMS_se if missing and TMS 95% CI is available
TMS_se = ifelse(is.na(TMS_se) & !is.na(TMS_lower_95CI) & !is.na(TMS_upper_95CI),
(TMS_upper_95CI - TMS_lower_95CI) / 1.96, TMS_se),
# Calculate TMF if missing and TMS is available
TMF = ifelse(is.na(TMF) & !is.na(TMS),
ifelse(TMF_formula == "10^slope", 10^TMS, exp(TMS)), TMF),
# Calculate z_value if TMS_se is missing and pvalue_adj is available
z_value = ifelse(is.na(TMS_se) & !is.na(pvalue_adj),
qnorm(1 - pvalue_adj / 2), NA),
# Calculate TMS if missing and TMF is available
TMS = ifelse(is.na(TMS) & !is.na(TMF),
ifelse(TMF_formula == "10^slope", log10(TMF), log(TMF)), TMS),
# Calculate TMS_se if missing, TMS, and z_value are available
TMS_se = ifelse(is.na(TMS_se) & !is.na(TMS) & !is.na(z_value),
TMS / z_value, TMS_se),
# Calculate TMS_se if TMS is missing, but TMF and z_value are available
TMS_se = ifelse(is.na(TMS) & !is.na(TMF) & !is.na(z_value),
ifelse(TMF_formula == "10^slope", log10(TMF) / z_value, log(TMF) / z_value), TMS_se),
# Calculate TMF_se if missing and TMS_se is available
TMF_se = ifelse(is.na(TMF_se) & !is.na(TMS_se),
ifelse(TMF_formula == "10^slope", 10^TMS_se, exp(TMS_se)), TMF_se),
# Calculate TMF if missing and TMS_calculated is available
TMF = ifelse(is.na(TMF) & !is.na(TMS_calculated),
ifelse(TMF_formula == "10^slope", 10^TMS_calculated, exp(TMS_calculated)), TMF),
# Calculate TMF_se if missing and TMS_se_calculated is available
TMF_se = ifelse(is.na(TMF_se) & !is.na(TMS_se_calculated),
ifelse(TMF_formula == "10^slope", 10^TMS_se_calculated, exp(TMS_se_calculated)), TMF_se)
)
# Return the modified data
return(data)
}
# Apply the function
TMF_data <- function1(TMF_data)
# Count the number of rows with non-NA values in both TMF and TMF_se
TMF_data %>%
filter(!is.na(TMF) & !is.na(TMF_se)) %>%
nrow()
# [1] 981
# Count the number of rows with non-NA values in TMF
TMF_data %>%
filter(!is.na(TMF)) %>%
nrow()
# [1] 1018
TMF_data <- TMF_data %>%
filter(!is.na(TMF) | !is.na(TMF_se)) %>%
mutate(
ln_TMF = log(TMF),
ln_TMF_se = TMF_se / TMF, # Taylor approximation
Sample_type = factor(Sample_type), # Convert to factor
Sample_type = relevel(Sample_type, ref = "whole-organisms only") # Relevel
)
TMF_data <- TMF_data %>%
select(-(starts_with("Initial"))) %>% #removing any column starting with initials
select(-(starts_with("X"))) %>% #removing any column starting with X
select(-(ends_with("comment"))) %>% #removing any column ending with comment
select(-("Study_ID")) #removing Study_ID column because redundant
PFAS_data <- PFAS_data %>%
select(-(starts_with("Initial"))) %>% #removing any column starting with initials
select(-(starts_with("X"))) %>% #removing any column starting with X
select(-(ends_with("comment"))) #removing any column ending with comment
fw_data <- fw_data %>%
select(-(starts_with("Initial"))) %>% #removing any column starting with initials
select(-(starts_with("X"))) %>% #removing any column starting with X
select(-(ends_with("comment"))) #removing any column ending with comment
study_data <- study_data %>%
select(-(starts_with("Initials"))) %>% #removing any column starting with initials
select(-(starts_with("X"))) %>% #removing any column starting with X
select(-(ends_with("comment"))) %>% #removing any column ending with comment
filter(!is.na(Study_ID) & Study_ID != "") %>% #removing rows where Study_ID is empty (either NA or "")
select(-("Undetected_strategy_LOD")) %>% #removing Undetected_strategy_LOD column because redundant
select(-("Undetected_strategy_LOQ")) %>% #removing Undetected_strategy_LOQ column because redundant
mutate(Regression_variable = case_when(
grepl("TL", Linear_regression_formula) ~ "TL",
grepl("δ15N", Linear_regression_formula) ~ "δ15N",
TRUE ~ NA_character_
)) %>%
mutate(year_publication = as.numeric(str_extract(Author_year, "(?<=_)\\d{4}")))
TMF_data <- TMF_data %>%
# Correct mislabelings in the PFAS_ID column (TMF_data data table)
mutate(PFAS_ID = case_when(
PFAS_ID == "PFDoDa" ~ "PFDoDA",
PFAS_ID == "br-PFDoDa" ~ "br-PFDoDA",
TRUE ~ PFAS_ID
)) %>%
# Filter the data to remove rows where Sample_type is NA or an empty string
filter(!is.na(Sample_type) & Sample_type != "") %>%
# Recode values in the Undetected_strategy_LOD column
mutate(Undetected_strategy_LOD = recode(
Undetected_strategy_LOD,
"Zero" = "zero",
"The limit value divided by two" = "the limit value divided by two",
"The limit value divided by the square root of two" = "the limit value divided by the square root of two"
)) %>%
# Recode values in the Undetected_strategy_LOQ column
mutate(Undetected_strategy_LOQ = recode(
Undetected_strategy_LOQ,
"The limit value divided by two" = "the limit value divided by two ",
"used without alteration" = "the limit value"
))
PFAS_data <- PFAS_data %>%
# Correct mislabeling in the PFAS_ID column (PFAS_data data table)
mutate(PFAS_ID = case_when(
PFAS_ID == "cis-PFECHS " ~ "cis-PFECHS",
TRUE ~ PFAS_ID
))
fw_data <- fw_data %>%
# Trim whitespace
mutate(Latitude_DD = str_trim(Latitude_DD)) %>%
# Remove the ° symbol and any non-numeric characters
mutate(Latitude_DD = str_replace_all(Latitude_DD, "[^0-9.-]", "")) %>%
# Convert to numeric
mutate(Latitude_DD = as.numeric(Latitude_DD))
fw_data <- fw_data %>%
mutate(Breathing_type_fw = case_when(
Species_composition %in% c(
"fish only"
) ~ "water-breathing only",
Species_composition %in% c(
"fish and other species", "no fish"
) ~ "not water-breathing only",
TRUE ~ NA_character_ # Assign NA for any species not in the lists
)) %>%
mutate(
Ecosystem = factor(Ecosystem),
Ecosystem = relevel(Ecosystem, ref = "marine")) # Relevel
fw_data <- fw_data %>%
mutate(Breathing_type = case_when(
Species_highest_TL %in% c(
"Delphinapterus leucas", "Delphinus truncatus", "Larus hyperboreus",
"Channa asiatica", "Argyrosomus regius", "Larus crassirostris",
"Neophocaena", "Pusa hispida", "Sousa chinensis",
"Catharacta maccormicki", "Buteo hemilasius", "Canis lupus",
"Accipiter nisus", "Accipiter cooperii"
) ~ "Air breathing",
Species_highest_TL %in% c(
"Boreogadus saida", "Barbus barbus", "Squalius cephalus",
"Cololabis saira", "Salvelinus alpinus", "Salvelinus namaycush",
"Micropterus salmoides", "Notothenia coriiceps", "Esox lucius",
"Pagrosomus major", "Lateolabrax japonicas", "Thryssa mysiax",
"Trichiurus lepturus", "Hexagrammos otakii", "Rapana venosa",
"Sander lucioperca", "Salvelinus fontinalis", "Morone saxatilis",
"Channa argus", "Micropterus dolomieu", "Johnius belengeri",
"Trematomus eulepidotus", "Miichthys miiuy", "Ophiocephalus argus Cantor",
"Liza carinatus", "Sander vitreus", "Sebastes schlegeli",
"Hemibarbus maculates", "Merlangius merlangus", "Gaidropsarus vulgaris",
"Clarias macrocephalus", "Carassius auratus", "Oreochromis niloticus",
"Phoca vitulina", "Parabramis pekinensis", "Lateolabrax sp.",
"Sarda sarda"
) ~ "Water breathing",
Species_highest_TL %in% c(
"Hydropsychidae", "Polychaeta", "Zygoptera", "Lithobiomorph",
"Araneae"
) ~ "Water breathing through skin", # Many aquatic larvae breathe through skin
TRUE ~ NA_character_ # Assign NA for any species not in the lists
))
# The following function finds the highest and lowest trophic level processing TL_values for each row. Then, we also calculate the length of the food web using these values.
process_TL_extremes <- function(TL_string) {
# Split the string by commas
values <- unlist(strsplit(TL_string, ","))
# Process each value
processed_values <- sapply(values, function(value) {
value <- str_trim(value) # Trim any whitespace
# Skip processing if the value is NA or empty
if (is.na(value) || value == "") {
return(NA)
}
# Check if it's a range (e.g., 3.12-3.64)
if (str_detect(value, "-")) {
range_values <- as.numeric(unlist(strsplit(value, "-")))
return(range_values)
# Check if it's a value with error (e.g., 1.83±0.09)
} else if (str_detect(value, "±")) {
mean_value <- as.numeric(unlist(strsplit(value, "±"))[1])
return(mean_value)
# Otherwise, it's a simple numeric value
} else {
return(as.numeric(value))
}
})
# Unlist the processed values and remove NAs
processed_values <- unlist(processed_values)
processed_values <- processed_values[!is.na(processed_values)]
# Return the highest and lowest values, handle empty processed_values
if (length(processed_values) == 0) {
return(list(highest = NA, lowest = NA))
} else {
return(list(highest = max(processed_values, na.rm = TRUE),
lowest = min(processed_values, na.rm = TRUE)))
}
}
# Apply the function to each row in the TL_values column to get TL_highest and TL_lowest
fw_data <- fw_data %>%
mutate(TL_extremes = lapply(TL_values, process_TL_extremes)) %>%
mutate(TL_highest = sapply(TL_extremes, function(x) x$highest),
TL_lowest = sapply(TL_extremes, function(x) x$lowest)) %>%
select(-TL_extremes) # Remove the temporary TL_extremes column
fw_data <- fw_data %>%
# Create a new column for the difference
mutate(fw_length = TL_highest - TL_lowest)
# The following code creates a data model object called data_dm_no_keys using the dm() function, with the tables st, sp, pfas, co, sa, and me.
data_dm_no_keys <- dm(study_data, PFAS_data, fw_data, TMF_data)
data_dm_no_keys #printing the data_dm_no_keys object, which displays the structure and content of the data model
data_dm_no_keys$study_data #accessing the study_data table from the data model using $ operator and printing it
data_dm_no_keys[c("study_data", "PFAS_data")] #accessing the columns "study_data" and "PFAS_data" from the data model using indexing and printing the corresponding data
# The following code adds primary keys to the data_dm_no_keys data model using the dm_add_pk() function. It specifies the table and the corresponding column(s) to be used as primary keys for each table. The resulting data model with only primary keys is stored in the data_dm_only_pks object and printed. Next, foreign keys are added to the data_dm_only_pks data model using the dm_add_fk() function. It specifies the table, columns, and the referenced table for each foreign key constraint. The resulting data model with both primary and foreign keys is stored in the data_dm_all_keys object and printed.
data_dm_only_pks <- data_dm_no_keys %>%
dm_add_pk(table = study_data, columns = Study_ID) %>%
dm_add_pk(PFAS_data, PFAS_ID) %>%
dm_add_pk(fw_data, FW_ID) %>%
dm_add_pk(TMF_data, TMF_ID)
data_dm_only_pks
data_dm_all_keys <-
data_dm_only_pks %>%
dm_add_fk(table = fw_data, columns = Study_ID, ref_table = study_data) %>%
dm_add_fk(table = TMF_data, columns = FW_ID, ref_table = fw_data) %>%
dm_add_fk(table = TMF_data, columns = PFAS_ID, ref_table = PFAS_data)
data_dm_all_keys
# Create a palette of 70 colors using a range from a selection of base colors
color_palette <- colorRampPalette(c("red", "blue", "green", "purple", "orange", "pink", "yellow"))(70)
# Data model visualization:
data_dm_all_keys %>%
dm_draw()
# Data model integrity check:
data_dm_all_keys %>%
dm_examine_constraints()
## ℹ All constraints satisfied.
# The following chunk creates a new table (i.e., 'TMF_data2') that merges 'study_data', 'PFAS_data', 'fw_data', and 'TMF_data' tables.
TMF_data2 <-
data_dm_all_keys %>%
dm_select_tbl(TMF_data, fw_data, PFAS_data, study_data) %>%
dm_flatten_to_tbl(.start = TMF_data, .recursive = TRUE, .join = left_join)
colnames(TMF_data2)
# Save the table
write.csv(TMF_data2, here("data/TMF_data2.csv"), row.names = FALSE)
The following code creates a variance-covariance matrix (VCV) using the vcalc function:
VCV <- vcalc(vi = TMF_data2$ln_TMF_se^2, #Specifies the variance of the effect sizes
cluster = TMF_data2$FW_ID, #Indicates the clustering variable
obs = TMF_data2$TMF_ID, #Identifies each unique observation by its TMF_ID, allowing the function to calculate covariances between effect sizes that share a cluster.
rho = 0.5) #Sets the assumed correlation between effect sizes within the same cluster. This correlation reflects how much the effect sizes are expected to covary within the same food web.
The following code modifies the OrchaRd plot function to display model outputs on the natural log scale (ln_TMF) while presenting the plots on the original scale (TMF), enhancing the visual interpretation of the results.
orchard_plot_new <- function (object, mod = "1", group, xlab, N = NULL, alpha = 0.5,
angle = 90, cb = TRUE, k = TRUE, g = TRUE, tree.order = NULL,
trunk.size = 1, branch.size = 1.2, twig.size = 0.5, transfm = c("none",
"tanh", "invlogit", "percent", "percentr"), condition.lab = "Condition",
legend.pos = c("bottom.right", "bottom.left", "top.right",
"top.left", "top.out", "bottom.out", "none"), k.pos = c("right",
"left", "none"), colour = FALSE, fill = TRUE, weights = "prop",
by = NULL, at = NULL, upper = TRUE, flip = TRUE)
{
transfm <- match.arg(NULL, choices = transfm)
legend.pos <- match.arg(NULL, choices = legend.pos)
k.pos <- match.arg(NULL, choices = k.pos)
if (any(class(object) %in% c("robust.rma", "rma.mv", "rma",
"rma.uni"))) {
if (mod != "1") {
results <- orchaRd::mod_results(object, mod, group,
N, by = by, at = at, weights = weights, upper = upper)
}
else {
results <- orchaRd::mod_results(object, mod = "1",
group, N, by = by, at = at, weights = weights,
upper = upper)
}
}
if (any(class(object) %in% c("orchard"))) {
results <- object
}
mod_table <- results$mod_table
data_trim <- results$data
data_trim$moderator <- factor(data_trim$moderator, levels = mod_table$name,
labels = mod_table$name)
data_trim$scale <- (1/sqrt(data_trim[, "vi"]))
legend <- "Precision (1/SE)"
if (!is.null(tree.order) & length(tree.order) != nlevels(data_trim[,
"moderator"])) {
stop("Length of 'tree.order' does not equal number of categories in moderator")
}
if (!is.null(tree.order)) {
data_trim$moderator <- factor(data_trim$moderator, levels = tree.order,
labels = tree.order)
mod_table <- mod_table %>% dplyr::arrange(factor(name,
levels = tree.order))
}
if (is.null(N) == FALSE) {
data_trim$scale <- data_trim$N
legend <- paste0("Sample Size ($\\textit{N}$)")
}
if (transfm == "tanh") {
cols <- sapply(mod_table, is.numeric)
mod_table[, cols] <- Zr_to_r(mod_table[, cols])
data_trim$yi <- Zr_to_r(data_trim$yi)
label <- xlab
}
if (transfm == "invlogit") {
cols <- sapply(mod_table, is.numeric)
mod_table[, cols] <- lapply(mod_table[, cols], function(x) metafor::transf.ilogit(x))
data_trim$yi <- metafor::transf.ilogit(data_trim$yi)
label <- xlab
}
if (transfm == "percentr") {
cols <- sapply(mod_table, is.numeric)
mod_table[, cols] <- lapply(mod_table[, cols], function(x) (exp(x) -
1) * 100)
data_trim$yi <- (exp(data_trim$yi) - 1) * 100
label <- xlab
}
if (transfm == "percent") {
cols <- sapply(mod_table, is.numeric)
mod_table[, cols] <- lapply(mod_table[, cols], function(x) exp(x))
data_trim$yi <- (exp(data_trim$yi))
label <- xlab
}
else {
label <- xlab
}
mod_table$K <- as.vector(by(data_trim, data_trim[, "moderator"],
function(x) length(x[, "yi"])))
mod_table$g <- as.vector(num_studies(data_trim, moderator,
stdy)[, 2])
group_no <- length(unique(mod_table[, "name"]))
cbpl <- c("#888888", "#999933", "#D55E00", "#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288",
"#AA4499", "#44AA99", "#882255", "#661100",
"#6699CC", "#E69F00", "#56B4E9", "#009E73",
"#F0E442", "#0072B2", "#CC79A7", "#999999")
if (colour == TRUE) {
color <- as.factor(data_trim$stdy)
color2 <- NULL
}
else {
color <- data_trim$mod
color2 <- mod_table$name
}
if (fill == TRUE) {
fill <- color
}
else {
fill <- NULL
}
if (names(mod_table)[2] == "condition") {
condition_no <- length(unique(mod_table[, "condition"]))
plot <- ggplot2::ggplot() + ggbeeswarm::geom_quasirandom(data = data_trim,
ggplot2::aes(y = yi, x = moderator, size = scale,
colour = color, fill = fill), alpha = alpha,
shape = 21) + ggplot2::geom_hline(yintercept = 1,
linetype = 2, colour = "red", alpha = alpha) +
ggplot2::geom_linerange(data = mod_table, ggplot2::aes(x = name,
ymin = lowerCL, ymax = upperCL), size = branch.size,
position = ggplot2::position_dodge2(width = 0.3)) +
ggplot2::geom_pointrange(data = mod_table, ggplot2::aes(y = estimate,
x = name, ymin = lowerPR, ymax = upperPR, shape = as.factor(condition),
fill = color2), size = trunk.size, position = ggplot2::position_dodge2(width = 0.3),
linewidth = twig.size) + ggplot2::scale_shape_manual(values = 20 +
(1:condition_no)) + ggplot2::theme_bw() + ggplot2::guides(fill = "none",
colour = "none") + ggplot2::theme(legend.position = c(0,
1), legend.justification = c(0, 1)) + ggplot2::theme(legend.title = ggplot2::element_text(size = 9)) +
ggplot2::theme(legend.direction = "horizontal") +
ggplot2::theme(legend.background = ggplot2::element_blank()) +
ggplot2::labs(y = label, x = "", size = latex2exp::TeX(legend)) +
ggplot2::labs(shape = condition.lab) + ggplot2::theme(axis.text.y = ggplot2::element_text(size = 10,
colour = "black", hjust = 0.5, angle = angle))
if (flip) {
plot <- plot + ggplot2::coord_flip()
}
}
else {
plot <- ggplot2::ggplot() + ggbeeswarm::geom_quasirandom(data = data_trim,
ggplot2::aes(y = yi, x = moderator, size = scale,
colour = color, fill = fill), alpha = alpha,
shape = 21) + ggplot2::geom_hline(yintercept = 1,
linetype = 2, colour = "red", alpha = alpha) +
ggplot2::geom_linerange(data = mod_table, ggplot2::aes(x = name,
ymin = lowerCL, ymax = upperCL), size = branch.size) +
ggplot2::geom_pointrange(data = mod_table, ggplot2::aes(y = estimate,
x = name, ymin = lowerPR, ymax = upperPR, fill = color2),
size = trunk.size, linewidth = twig.size, shape = 21) +
ggplot2::theme_bw() + ggplot2::guides(fill = "none",
colour = "none") + ggplot2::theme(legend.title = ggplot2::element_text(size = 9)) +
ggplot2::theme(legend.direction = "horizontal") +
ggplot2::theme(legend.background = ggplot2::element_blank()) +
ggplot2::labs(y = label, x = "", size = latex2exp::TeX(legend)) +
ggplot2::theme(axis.text.y = ggplot2::element_text(size = 10,
colour = "black", hjust = 0.5, angle = angle))
if (flip) {
plot <- plot + ggplot2::coord_flip()
}
}
if (legend.pos == "bottom.right") {
plot <- plot + ggplot2::theme(legend.position = c(1,
0), legend.justification = c(1, 0))
}
else if (legend.pos == "bottom.left") {
plot <- plot + ggplot2::theme(legend.position = c(0,
0), legend.justification = c(0, 0))
}
else if (legend.pos == "top.right") {
plot <- plot + ggplot2::theme(legend.position = c(1,
1), legend.justification = c(1, 1))
}
else if (legend.pos == "top.left") {
plot <- plot + ggplot2::theme(legend.position = c(0,
1), legend.justification = c(0, 1))
}
else if (legend.pos == "top.out") {
plot <- plot + ggplot2::theme(legend.position = "top")
}
else if (legend.pos == "bottom.out") {
plot <- plot + ggplot2::theme(legend.position = "bottom")
}
else if (legend.pos == "none") {
plot <- plot + ggplot2::theme(legend.position = "none")
}
if (cb == TRUE) {
plot <- plot + ggplot2::scale_fill_manual(values = cbpl) +
ggplot2::scale_colour_manual(values = cbpl)
}
if (k == TRUE && g == FALSE && k.pos == "right") {
plot <- plot + ggplot2::annotate("text", y = (max(data_trim$yi) +
(max(data_trim$yi) * 0.1)), x = (seq(1, group_no,
1) + 0.3), label = paste("italic(k)==", mod_table$K[1:group_no]),
parse = TRUE, hjust = "right", size = 3.5)
}
else if (k == TRUE && g == FALSE && k.pos == "left") {
plot <- plot + ggplot2::annotate("text", y = (min(data_trim$yi) +
(min(data_trim$yi) * 0.1)), x = (seq(1, group_no,
1) + 0.3), label = paste("italic(k)==", mod_table$K[1:group_no]),
parse = TRUE, hjust = "left", size = 3.5)
}
else if (k == TRUE && g == TRUE && k.pos == "right") {
plot <- plot + ggplot2::annotate("text", y = (max(data_trim$yi) +
(max(data_trim$yi) * 0.1)), x = (seq(1, group_no,
1) + 0.3), label = paste("italic(k)==", mod_table$K[1:group_no],
"~", "(", mod_table$g[1:group_no], ")"), parse = TRUE,
hjust = "right", size = 3.5)
}
else if (k == TRUE && g == TRUE && k.pos == "left") {
plot <- plot + ggplot2::annotate("text", y = (min(data_trim$yi) +
(min(data_trim$yi) * 0.1)), x = (seq(1, group_no,
1) + 0.3), label = paste("italic(k)==", mod_table$K[1:group_no],
"~", "(", mod_table$g[1:group_no], ")"), parse = TRUE,
hjust = "left", size = 3.5)
}
else if (k == TRUE && g == FALSE && k.pos %in% c("right",
"left", "none") == FALSE) {
plot <- plot + ggplot2::annotate("text", y = k.pos, x = (seq(1,
group_no, 1) + 0.3), label = paste("italic(k)==",
mod_table$K[1:group_no]), parse = TRUE, size = 3.5)
}
else if (k == TRUE && g == TRUE && k.pos %in% c("right",
"left", "none") == FALSE) {
plot <- plot + ggplot2::annotate("text", y = k.pos, x = (seq(1,
group_no, 1) + 0.3), label = paste("italic(k)==",
mod_table$K[1:group_no], "~", "(", mod_table$g[1:group_no],
")"), parse = TRUE, size = 3.5)
}
return(plot)
}
The following code defines a standardized function to fit multivariate meta-analytic models. We are going to use this function to run all single-moderator models below.
run_model <- function(data, formula){
data <- as.data.frame(data) # convert data set into a data frame to calculate VCV matrix
VCV <- impute_covariance_matrix(data$ln_TMF_se^2,
cluster = data$FW_ID,
r = 0.5) # create VCV matrix for the specified data
rma.mv(ln_TMF,
VCV,
mods = formula,
random = list(~1|Study_ID,
~1|FW_ID,
~1|PFAS_ID,
~1|TMF_ID
),
test = "t",
data = data,
sparse = TRUE)
}
Determine the random effect structure: The random effects model accounts for between-study heterogeneity, which reflects the variability in effect sizes across the studies being analyzed. In determining the random effect structure, we aim to identify the sources of this heterogeneity and quantify its extent within the data. This step is crucial for accurately modeling the variation and ensuring that the results are generalizable beyond the specific studies included in the analysis.
–> NOTE! Model outputs are on the natural log scale (ln TMF), whereas the plots on the original scale (TMF).
mod <- rma.mv(yi = ln_TMF, # Natural logarithm of the Trophic Magnification Factor
V = VCV, # Variance co-variance matrix
random = list(~1|Study_ID, # Between-study effect
~1|FW_ID, # Between food webs effect
~1|PFAS_ID, # Between type of PFAS effect
~1|TMF_ID # Within-study effect
),
test = "t",
sparse = T,
data = TMF_data2)
TMF_data2_PFTrDA <- filter(TMF_data2,PFAS_ID == "PFTrDA")
rma(yi = ln_TMF, vi = ln_TMF_se^2, data = TMF_data2_PFTrDA)
save(mod, file = here("Rdata", "mod.RData"))
##
## Multivariate Meta-Analysis Model (k = 986; method: REML)
##
## logLik Deviance AIC BIC AICc
## -1016.6399 2033.2798 2043.2798 2067.7430 2043.3411
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1734 0.4164 62 no Study_ID
## sigma^2.2 0.0779 0.2791 115 no FW_ID
## sigma^2.3 0.1893 0.4351 77 no PFAS_ID
## sigma^2.4 0.1665 0.4081 986 no TMF_ID
##
## Test for Heterogeneity:
## Q(df = 985) = 25385.3772, p-val < .0001
##
## Model Results:
##
## estimate se tval df pval ci.lb ci.ub
## 0.6952 0.1034 6.7222 985 <.0001 0.4923 0.8981 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## I2_Total I2_Study_ID I2_FW_ID I2_PFAS_ID I2_TMF_ID
## 97.60287 27.88155 12.52071 30.42905 26.77157
The red dotted line represents TMF = 1, the threshold indicating trophic magnification when exceeded, and biodilution when below. Next, we’ll plot the same graph with a zoomed-in view to enhance the visual interpretation of the results. (Note: The maximum value on the x-axis is equal to 10).
Our model shows that the overall PFAS Trophic Magnification Factor (TMF) is 2 (95% CI: 1.63-2.45), which is statistically significantly greater than 1. This indicates that, on average, PFAS) biomagnify within food webs by a factor of 2. A TMF of 2 means that the concentration of PFAS doubles with each increase in trophic level.
VCV_chem <- vcalc(vi = ln_TMF_se^2,
cluster = FW_ID,
obs = TMF_ID,
subgroup = PFAS_ID,
rho = 0.5,
data = TMF_data3)
mod_chem <- rma.mv(ln_TMF ~ PFAS_ID - 1,
V = VCV_chem,
random = list(~ PFAS_ID | FW_ID),
struct = "DIAG",
test = "t",
sparse = T,
data = TMF_data3,
#verbose = TRUE,
control=list(iter.max=1000,
rel.tol=1e-8))
# Save the model
save(mod_chem, file = here("Rdata", "mod_chem.RData"))
# Save the table
write.csv(TMF_data3, here("data/TMF_data3.csv"), row.names = FALSE)
##
## Multivariate Meta-Analysis Model (k = 915; method: REML)
##
## logLik Deviance AIC BIC AICc
## -1855.7618 3711.5237 3859.5237 4213.0695 3873.3468
##
## Variance Components:
##
## outer factor: FW_ID (nlvls = 113)
## inner factor: PFAS_ID (nlvls = 37)
##
## estim sqrt k.lvl fixed level
## tau^2.1 0.0000 0.0000 2 no 10:2 FTSA
## tau^2.2 1.9337 1.3906 3 no 6:2 diPAP
## tau^2.3 0.9842 0.9921 4 no 6:2 FTSA
## tau^2.4 0.0000 0.0000 2 no 6:2 H-PFESA
## tau^2.5 0.1392 0.3732 6 no 8:2 Cl-PFESA
## tau^2.6 0.0000 0.0001 2 no 8:2 FTSA
## tau^2.7 0.0000 0.0000 3 no ADONA
## tau^2.8 0.0000 0.0000 2 no br-PFDoDA
## tau^2.9 0.3085 0.5554 12 no br-PFOS
## tau^2.10 0.0000 0.0000 29 no F53B
## tau^2.11 0.3980 0.6309 27 no FOSA
## tau^2.12 0.0000 0.0019 2 no FOSAA
## tau^2.13 0.6344 0.7965 2 no H-PFMO2OSA
## tau^2.14 0.8068 0.8982 2 no HFPO-TeA
## tau^2.15 0.0620 0.2490 2 no HFPO-TrA
## tau^2.16 0.2924 0.5407 20 no l-PFOS
## tau^2.17 0.0000 0.0030 3 no MeFOSAA
## tau^2.18 0.5642 0.7512 6 no NEtFOSAA
## tau^2.19 0.0000 0.0010 14 no PFBA
## tau^2.20 0.3702 0.6084 19 no PFBS
## tau^2.21 0.3221 0.5676 83 no PFDA
## tau^2.22 0.1409 0.3753 82 no PFDoDA
## tau^2.23 0.7195 0.8483 21 no PFDS
## tau^2.24 0.0000 0.0000 17 no PFHpA
## tau^2.25 0.6591 0.8119 8 no PFHpS
## tau^2.26 1.4889 1.2202 14 no PFHxA
## tau^2.27 0.0000 0.0000 8 no PFHxDA
## tau^2.28 0.3318 0.5760 42 no PFHxS
## tau^2.29 0.0030 0.0550 2 no PFMOAA
## tau^2.30 0.3027 0.5502 85 no PFNA
## tau^2.31 0.0000 0.0000 2 no PFO5DoA
## tau^2.32 0.5229 0.7231 70 no PFOA
## tau^2.33 0.1513 0.3890 108 no PFOS
## tau^2.34 0.0000 0.0000 10 no PFPeA
## tau^2.35 0.1953 0.4419 46 no PFTeDA
## tau^2.36 0.5574 0.7466 63 no PFTrDA
## tau^2.37 0.2907 0.5392 92 no PFUnDA
##
## Test for Residual Heterogeneity:
## QE(df = 878) = 12857.4272, p-val < .0001
##
## Test of Moderators (coefficients 1:37):
## F(df1 = 37, df2 = 878) = 30.6223, p-val < .0001
##
## Model Results:
##
## estimate se tval df pval ci.lb ci.ub
## PFAS_ID10:2 FTSA 1.2462 0.2490 5.0046 878 <.0001 0.7574 1.7349
## PFAS_ID6:2 diPAP -1.8059 0.9304 -1.9410 878 0.0526 -3.6319 0.0202
## PFAS_ID6:2 FTSA -0.9780 0.6372 -1.5349 878 0.1252 -2.2286 0.2726
## PFAS_ID6:2 H-PFESA 0.9898 0.4307 2.2981 878 0.0218 0.1445 1.8350
## PFAS_ID8:2 Cl-PFESA 1.4206 0.2876 4.9389 878 <.0001 0.8560 1.9851
## PFAS_ID8:2 FTSA 0.8844 0.4137 2.1378 878 0.0328 0.0724 1.6964
## PFAS_IDADONA -0.7057 0.3912 -1.8039 878 0.0716 -1.4736 0.0621
## PFAS_IDbr-PFDoDA 1.0641 0.2368 4.4938 878 <.0001 0.5994 1.5289
## PFAS_IDbr-PFOS 0.9115 0.1780 5.1203 878 <.0001 0.5621 1.2609
## PFAS_IDF53B 1.0327 0.1156 8.9366 878 <.0001 0.8059 1.2595
## PFAS_IDFOSA 0.6252 0.1688 3.7048 878 0.0002 0.2940 0.9565
## PFAS_IDFOSAA -0.5627 1.3995 -0.4021 878 0.6877 -3.3094 2.1841
## PFAS_IDH-PFMO2OSA 1.7034 0.6282 2.7118 878 0.0068 0.4706 2.9363
## PFAS_IDHFPO-TeA 1.8564 0.6858 2.7071 878 0.0069 0.5105 3.2024
## PFAS_IDHFPO-TrA 1.3689 0.3644 3.7562 878 0.0002 0.6536 2.0842
## PFAS_IDl-PFOS 0.8779 0.1444 6.0788 878 <.0001 0.5945 1.1614
## PFAS_IDMeFOSAA -0.2257 0.3430 -0.6579 878 0.5108 -0.8989 0.4476
## PFAS_IDNEtFOSAA -0.4828 0.3718 -1.2988 878 0.1944 -1.2125 0.2468
## PFAS_IDPFBA 0.2795 0.2562 1.0910 878 0.2756 -0.2233 0.7822
## PFAS_IDPFBS -0.0909 0.2922 -0.3110 878 0.7559 -0.6644 0.4826
## PFAS_IDPFDA 1.0391 0.0921 11.2851 878 <.0001 0.8584 1.2198
## PFAS_IDPFDoDA 0.7149 0.0739 9.6799 878 <.0001 0.5699 0.8598
## PFAS_IDPFDS 0.4972 0.2583 1.9246 878 0.0546 -0.0098 1.0042
## PFAS_IDPFHpA -0.0783 0.1677 -0.4669 878 0.6407 -0.4075 0.2509
## PFAS_IDPFHpS 0.7256 0.3238 2.2404 878 0.0253 0.0899 1.3612
## PFAS_IDPFHxA -0.3424 0.4406 -0.7771 878 0.4373 -1.2071 0.5223
## PFAS_IDPFHxDA 0.1497 0.0837 1.7885 878 0.0740 -0.0146 0.3140
## PFAS_IDPFHxS 0.5701 0.1428 3.9913 878 <.0001 0.2898 0.8504
## PFAS_IDPFMOAA -1.3933 1.1181 -1.2461 878 0.2131 -3.5878 0.8012
## PFAS_IDPFNA 0.8047 0.0944 8.5272 878 <.0001 0.6195 0.9899
## PFAS_IDPFO5DoA 1.7248 0.1695 10.1756 878 <.0001 1.3921 2.0575
## PFAS_IDPFOA 0.2674 0.1438 1.8597 878 0.0633 -0.0148 0.5497
## PFAS_IDPFOS 1.1387 0.0674 16.8984 878 <.0001 1.0064 1.2710
## PFAS_IDPFPeA -0.0404 0.3065 -0.1319 878 0.8951 -0.6420 0.5611
## PFAS_IDPFTeDA 0.3494 0.1075 3.2512 878 0.0012 0.1385 0.5604
## PFAS_IDPFTrDA 0.7051 0.1356 5.1993 878 <.0001 0.4390 0.9713
## PFAS_IDPFUnDA 0.8821 0.0878 10.0436 878 <.0001 0.7097 1.0544
##
## PFAS_ID10:2 FTSA ***
## PFAS_ID6:2 diPAP .
## PFAS_ID6:2 FTSA
## PFAS_ID6:2 H-PFESA *
## PFAS_ID8:2 Cl-PFESA ***
## PFAS_ID8:2 FTSA *
## PFAS_IDADONA .
## PFAS_IDbr-PFDoDA ***
## PFAS_IDbr-PFOS ***
## PFAS_IDF53B ***
## PFAS_IDFOSA ***
## PFAS_IDFOSAA
## PFAS_IDH-PFMO2OSA **
## PFAS_IDHFPO-TeA **
## PFAS_IDHFPO-TrA ***
## PFAS_IDl-PFOS ***
## PFAS_IDMeFOSAA
## PFAS_IDNEtFOSAA
## PFAS_IDPFBA
## PFAS_IDPFBS
## PFAS_IDPFDA ***
## PFAS_IDPFDoDA ***
## PFAS_IDPFDS .
## PFAS_IDPFHpA
## PFAS_IDPFHpS *
## PFAS_IDPFHxA
## PFAS_IDPFHxDA .
## PFAS_IDPFHxS ***
## PFAS_IDPFMOAA
## PFAS_IDPFNA ***
## PFAS_IDPFO5DoA ***
## PFAS_IDPFOA .
## PFAS_IDPFOS ***
## PFAS_IDPFPeA
## PFAS_IDPFTeDA **
## PFAS_IDPFTrDA ***
## PFAS_IDPFUnDA ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
In our meta-analysis, we have 4 categories of predictors:
Whole-organism concentrations are usually measured at the base of food webs (plankton, lichens). For animals of higher trophic levels (seals, bears etc.) practical and ethical reasons mean sampling is done on specific organs or fluids instead. The sampling strategy also pertains to whether the data was initially gathered due to a study’s focus on ecological risk (whole animals) or human health risk (emphasizing edible parts such as fillets, eggs, muscle tissue, etc.).
We predict that PFAS biomagnification estimates based exclusively on whole-organism samples may differ from those on a mix of whole-organism and organ-specific.
sum(is.na(TMF_data2$Sample_type)) #checking the amount of NAs
#[1] 0
sample_model <- run_model(TMF_data2, ~ Sample_type)
save(sample_model, file = here("Rdata", "sample_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 986; method: REML)
##
## logLik Deviance AIC BIC AICc
## -1006.4010 2012.8019 2026.8019 2061.0362 2026.9168
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1814 0.4259 62 no Study_ID
## sigma^2.2 0.0760 0.2756 115 no FW_ID
## sigma^2.3 0.1902 0.4361 77 no PFAS_ID
## sigma^2.4 0.1616 0.4020 986 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 983) = 24167.4613, p-val < .0001
##
## Test of Moderators (coefficients 2:3):
## F(df1 = 2, df2 = 983) = 7.3226, p-val = 0.0007
##
## Model Results:
##
## estimate se tval df
## intrcpt 0.4860 0.1278 3.8033 983
## Sample_typespecific tissues 0.1928 0.1744 1.1056 983
## Sample_typespecific tissues and whole-organisms 0.4025 0.1056 3.8102 983
## pval ci.lb ci.ub
## intrcpt 0.0002 0.2352 0.7368 ***
## Sample_typespecific tissues 0.2692 -0.1494 0.5351
## Sample_typespecific tissues and whole-organisms 0.0001 0.1952 0.6098 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Note: Maximum TMF value set to 10
Note: The maximum TMF value in the figure is capped at 10 to enhance the visual clarity of the results.
TMFs estimated using both tissue-specific samples (e.g., liver, blood) and whole-organism samples are significantly higher by 49% (p = 0.0001; 95% CI: 21-84%) compared to those estimated using only whole-organism samples.
Evaluation of biomagnification using TMFs based on protein- or lipid-normalized concentrations in food web organisms has been rarely suggested. However, it should be considered a source of variability in biomagnification estimates. We predict that the biomagnification estimates of TMFs based on protein- or lipid-normalized PFAS concentrations may differ from those based on concentrations non-normalised.
norm_model <- run_model(TMF_data2_norm, ~ Conc_determ_method - 1)
save(norm_model, file = here("Rdata", "norm_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 945; method: REML)
##
## logLik Deviance AIC BIC AICc
## -977.8596 1955.7192 1971.7192 2010.4948 1971.8737
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1745 0.4177 60 no Study_ID
## sigma^2.2 0.0671 0.2591 113 no FW_ID
## sigma^2.3 0.1798 0.4240 77 no PFAS_ID
## sigma^2.4 0.1653 0.4066 945 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 941) = 24554.3672, p-val < .0001
##
## Test of Moderators (coefficients 1:4):
## F(df1 = 4, df2 = 941) = 18.8295, p-val < .0001
##
## Model Results:
##
## estimate se tval df pval ci.lb ci.ub
## Conc_determ_methoddw 0.4633 0.1439 3.2194 941 0.0013 0.1809 0.7457
## Conc_determ_methodlw 0.1659 0.1492 1.1122 941 0.2664 -0.1269 0.4588
## Conc_determ_methodpw 0.3846 0.1447 2.6577 941 0.0080 0.1006 0.6686
## Conc_determ_methodww 0.7901 0.1050 7.5226 941 <.0001 0.5839 0.9962
##
## Conc_determ_methoddw **
## Conc_determ_methodlw
## Conc_determ_methodpw **
## Conc_determ_methodww ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`position_quasirandom()`).
Note: Maximum TMF value set to 12
Normalized concentrations have the lowest estimate (lw = 0.1659 and pw = 0.3846), compared to not normalized concentrations (dw = 0.4633 and ww = 0.7901). Let’s merge these four categories into two categories (normalized and not normalized concentrations).
TMF_data2_norm <- TMF_data2_norm %>%
mutate(Normalization = recode(
Conc_determ_method,
"lw" = "Normalized",
"ww" = "Not normalized",
"pw" = "Normalized",
"dw" = "Not normalized"
))
norm_model2 <- run_model(TMF_data2_norm, ~ Normalization)
save(norm_model2, file = here("Rdata", "norm_model2.RData"))
##
## Multivariate Meta-Analysis Model (k = 945; method: REML)
##
## logLik Deviance AIC BIC AICc
## -984.2558 1968.5116 1980.5116 2009.6060 1980.6013
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1745 0.4178 60 no Study_ID
## sigma^2.2 0.0698 0.2642 113 no FW_ID
## sigma^2.3 0.1783 0.4222 77 no PFAS_ID
## sigma^2.4 0.1679 0.4097 945 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 943) = 24804.7664, p-val < .0001
##
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 943) = 17.5308, p-val < .0001
##
## Model Results:
##
## estimate se tval df pval ci.lb
## intrcpt 0.3708 0.1304 2.8445 943 0.0045 0.1150
## NormalizationNot normalized 0.3626 0.0866 4.1870 943 <.0001 0.1926
## ci.ub
## intrcpt 0.6267 **
## NormalizationNot normalized 0.5325 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning: Removed 15 rows containing missing values or values outside the scale range
## (`position_quasirandom()`).
Note: Maximum TMF value set to 10
TMFs estimated using non-normalized PFAS concentrations (i.e., not adjusted for protein or lipid content) are significantly higher by 43% (p < .0001; 95% CI: 21-70%) compared to those estimated using normalized PFS concentrations.
We do not have much data on this predictor. Also, we found a result that is opposite to our prediction.
Using one-half of the limit value as a substitution for undetected compounds may inflate baseline compound concentrations. We predict that a substitution of undetected compound values by one-half of the limit value decreases the TMF compared with different approaches.
lod_model <- run_model(TMF_data2_lod, ~ Undetected_strategy_LOD - 1)
save(lod_model, file = here("Rdata", "lod_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 291; method: REML)
##
## logLik Deviance AIC BIC AICc
## -339.9304 679.8609 693.8609 719.5016 694.2609
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1726 0.4155 20 no Study_ID
## sigma^2.2 0.0203 0.1424 36 no FW_ID
## sigma^2.3 0.2241 0.4734 43 no PFAS_ID
## sigma^2.4 0.2207 0.4698 291 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 288) = 5418.8686, p-val < .0001
##
## Test of Moderators (coefficients 1:3):
## F(df1 = 3, df2 = 288) = 7.1684, p-val = 0.0001
##
## Model Results:
##
## estimate
## Undetected_strategy_LODthe limit value divided by the square root of two -0.3617
## Undetected_strategy_LODthe limit value divided by two 0.7401
## Undetected_strategy_LODzero 0.3838
## se
## Undetected_strategy_LODthe limit value divided by the square root of two 0.4167
## Undetected_strategy_LODthe limit value divided by two 0.1679
## Undetected_strategy_LODzero 0.2822
## tval
## Undetected_strategy_LODthe limit value divided by the square root of two -0.8680
## Undetected_strategy_LODthe limit value divided by two 4.4065
## Undetected_strategy_LODzero 1.3598
## df
## Undetected_strategy_LODthe limit value divided by the square root of two 288
## Undetected_strategy_LODthe limit value divided by two 288
## Undetected_strategy_LODzero 288
## pval
## Undetected_strategy_LODthe limit value divided by the square root of two 0.3861
## Undetected_strategy_LODthe limit value divided by two <.0001
## Undetected_strategy_LODzero 0.1750
## ci.lb
## Undetected_strategy_LODthe limit value divided by the square root of two -1.1819
## Undetected_strategy_LODthe limit value divided by two 0.4095
## Undetected_strategy_LODzero -0.1717
## ci.ub
## Undetected_strategy_LODthe limit value divided by the square root of two 0.4585
## Undetected_strategy_LODthe limit value divided by two 1.0706
## Undetected_strategy_LODzero 0.9393
##
## Undetected_strategy_LODthe limit value divided by the square root of two
## Undetected_strategy_LODthe limit value divided by two ***
## Undetected_strategy_LODzero
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`position_quasirandom()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_segment()`).
Note: Maximum TMF value set to 10
Different studies use different N-isotope TEF to calculate the trophic level of species. The choice of TEF can affect the resulting TMF. We predict that different TEF choices can result in under- or overestimation of PFAS biomagnification.
tef_model <- run_model(TMF_data2_tef, ~ TEF - 1)
save(tef_model, file = here("Rdata", "tef_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 733; method: REML)
##
## logLik Deviance AIC BIC AICc
## -790.8891 1581.7782 1597.7782 1634.5116 1597.9782
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1993 0.4464 44 no Study_ID
## sigma^2.2 0.0992 0.3150 78 no FW_ID
## sigma^2.3 0.1316 0.3627 66 no PFAS_ID
## sigma^2.4 0.1929 0.4392 733 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 729) = 19872.9907, p-val < .0001
##
## Test of Moderators (coefficients 1:4):
## F(df1 = 4, df2 = 729) = 11.5798, p-val < .0001
##
## Model Results:
##
## estimate se tval df pval ci.lb ci.ub
## TEF2.3 0.3873 0.3402 1.1387 729 0.2552 -0.2805 1.0551
## TEF3.3 0.4088 0.5877 0.6956 729 0.4869 -0.7451 1.5627
## TEF3.4 0.8904 0.2147 4.1477 729 <.0001 0.4689 1.3118 ***
## TEF3.8 0.7899 0.1342 5.8868 729 <.0001 0.5265 1.0534 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`position_quasirandom()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_segment()`).
Note: Maximum TMF value set to 12
We did not find significant differences between groups. According to the single-moderator regression, the choice of TEF does not significantly impact the differences between TMFs of different studies.
rv_model <- run_model(TMF_data2_rv, ~ Regression_variable)
save(rv_model, file = here("Rdata", "rv_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 976; method: REML)
##
## logLik Deviance AIC BIC AICc
## -991.9299 1983.8599 1995.8599 2025.1483 1995.9467
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1516 0.3893 61 no Study_ID
## sigma^2.2 0.0818 0.2861 113 no FW_ID
## sigma^2.3 0.1896 0.4354 76 no PFAS_ID
## sigma^2.4 0.1658 0.4072 976 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 974) = 25342.8813, p-val < .0001
##
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 974) = 6.2950, p-val = 0.0123
##
## Model Results:
##
## estimate se tval df pval ci.lb
## intrcpt 0.7443 0.1046 7.1131 974 <.0001 0.5390
## Regression_variableδ15N -0.7402 0.2950 -2.5090 974 0.0123 -1.3191
## ci.ub
## intrcpt 0.9497 ***
## Regression_variableδ15N -0.1612 *
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning: Removed 15 rows containing missing values or values outside the scale range
## (`position_quasirandom()`).
Note: Maximum TMF value set to 10
TMFs estimated by regressing stable nitrogen isotopes (δ¹⁵N) against PFAS concentrations are significantly lower by 47% (p = 0.123; 95% CI: 26-85%) compared to those obtained by regressing trophic levels against PFAS concentrations.
Tropical food webs are more intricate due to higher biodiversity, enabling more diverse consumer diets (Paine, 1966). Greater biomass and tissue turnover may dilute pollutants across networks. The latitude may incorporate the synergic effect of several factors, such as food web length and nature of top predator and food web baseline organism. We predict that PFAS biomagnification estimates on food webs closer to the equator may be lower than those at higher latitudes.
sum(is.na(TMF_data2$Latitude_abs)) #checking the amount of NAs
#[1] 0
lat_model <- run_model(TMF_data2, ~ scale(Latitude_abs))
save(lat_model, file = here("Rdata", "lat_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 976; method: REML)
##
## logLik Deviance AIC BIC AICc
## -995.0143 1990.0287 2002.0287 2031.3172 2002.1156
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1795 0.4237 61 no Study_ID
## sigma^2.2 0.0797 0.2823 113 no FW_ID
## sigma^2.3 0.1882 0.4338 76 no PFAS_ID
## sigma^2.4 0.1659 0.4073 976 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 974) = 22852.5048, p-val < .0001
##
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 974) = 0.0022, p-val = 0.9623
##
## Model Results:
##
## estimate se tval df pval ci.lb ci.ub
## intrcpt 0.6845 0.1046 6.5433 974 <.0001 0.4792 0.8897
## scale(Latitude_abs) -0.0036 0.0754 -0.0473 974 0.9623 -0.1515 0.1444
##
## intrcpt ***
## scale(Latitude_abs)
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TMFs do not show a direct relationship with latitude.
location_model <- run_model(TMF_data2_loc, ~ Location - 1)
save(location_model, file = here("Rdata", "location_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 944; method: REML)
##
## logLik Deviance AIC BIC AICc
## -938.1750 1876.3501 1894.3501 1937.9534 1894.5438
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.2031 0.4507 60 no Study_ID
## sigma^2.2 0.0378 0.1944 111 no FW_ID
## sigma^2.3 0.2283 0.4778 73 no PFAS_ID
## sigma^2.4 0.0946 0.3076 944 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 939) = 2194.3659, p-val < .0001
##
## Test of Moderators (coefficients 1:5):
## F(df1 = 5, df2 = 939) = 7.2630, p-val < .0001
##
## Model Results:
##
## estimate se tval df pval ci.lb
## LocationAntarctic Region 0.6373 0.3958 1.6101 939 0.1077 -0.1395
## LocationArctic Region 1.1081 0.4458 2.4853 939 0.0131 0.2331
## LocationAsia 0.7401 0.1569 4.7182 939 <.0001 0.4323
## LocationEurope 0.4716 0.1989 2.3718 939 0.0179 0.0814
## LocationNorth America 0.6727 0.1794 3.7503 939 0.0002 0.3207
## ci.ub
## LocationAntarctic Region 1.4142
## LocationArctic Region 1.9830 *
## LocationAsia 1.0480 ***
## LocationEurope 0.8619 *
## LocationNorth America 1.0247 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`position_quasirandom()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_segment()`).
Note: Maximum TMF value set to 12
TMFs estimated in food webs from different geographical regions are not statistically different.
Biomagnification of PFAS in food webs dominated by poikilotherms (e.g., aquatic ecosystem) is lower than in food webs dominated by air breathers (e.g., terrestrial ecosystems) because of differences in metabolic rates and lipid content between these two types of organisms. We predict that PFAS biomagnification estimates tend to be higher in food webs that include solely air-breathing organisms or a combination of air-breathing and water-breathing organisms, compared to those consisting exclusively of water-breathing organisms.
breath_model <- run_model(TMF_data2_bt, ~ Breathing_type)
save(breath_model, file = here("Rdata", "breath_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 858; method: REML)
##
## logLik Deviance AIC BIC AICc
## -863.9409 1727.8817 1739.8817 1768.3953 1739.9807
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.2997 0.5475 53 no Study_ID
## sigma^2.2 0.0000 0.0001 97 no FW_ID
## sigma^2.3 0.2252 0.4746 75 no PFAS_ID
## sigma^2.4 0.0918 0.3031 858 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 856) = 2082.9747, p-val < .0001
##
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 856) = 6.5976, p-val = 0.0104
##
## Model Results:
##
## estimate se tval df pval ci.lb
## intrcpt 1.0230 0.1837 5.5674 856 <.0001 0.6623
## Breathing_typeWater breathing -0.5051 0.1966 -2.5686 856 0.0104 -0.8910
## ci.ub
## intrcpt 1.3836 ***
## Breathing_typeWater breathing -0.1191 *
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`position_quasirandom()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_segment()`).
Note: Maximum TMF value set to 12
TMFs estimated in food webs with a water-breathing top predator are significantly lower by 60% (p = 0.0104; 95% CI: 41-89%) compared to those in food webs with an air-breathing top predator.
sum(is.na(TMF_data2$Ecosystem)) #checking the amount of NAs
#[1] 0
eco_model <- run_model(TMF_data2, ~ Ecosystem)
save(eco_model, file = here("Rdata", "eco_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 986; method: REML)
##
## logLik Deviance AIC BIC AICc
## -1009.3323 2018.6646 2034.6646 2073.7814 2034.8126
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1663 0.4078 62 no Study_ID
## sigma^2.2 0.0711 0.2667 115 no FW_ID
## sigma^2.3 0.1885 0.4341 77 no PFAS_ID
## sigma^2.4 0.1670 0.4087 986 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 982) = 21859.8611, p-val < .0001
##
## Test of Moderators (coefficients 2:4):
## F(df1 = 3, df2 = 982) = 1.2448, p-val = 0.2922
##
## Model Results:
##
## estimate se tval df pval
## intrcpt 0.8912 0.1505 5.9211 982 <.0001
## Ecosystemestuarine/tidal/brackish -0.3738 0.2194 -1.7041 982 0.0887
## Ecosystemfreshwater -0.2774 0.1811 -1.5312 982 0.1260
## Ecosystemterrestrial -0.1599 0.2792 -0.5728 982 0.5669
## ci.lb ci.ub
## intrcpt 0.5959 1.1866 ***
## Ecosystemestuarine/tidal/brackish -0.8043 0.0567 .
## Ecosystemfreshwater -0.6328 0.0781
## Ecosystemterrestrial -0.7077 0.3879
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Note: Maximum TMF value set to 12
sum(is.na(TMF_data2$Breathing_type_fw))
# [1] 0
bt_fw_model <- run_model(TMF_data2, ~ Breathing_type_fw)
save(bt_fw_model, file = here("Rdata", "bt_fw_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 976; method: REML)
##
## logLik Deviance AIC BIC AICc
## -992.0446 1984.0891 1996.0891 2025.3776 1996.1760
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.2025 0.4500 61 no Study_ID
## sigma^2.2 0.0555 0.2356 113 no FW_ID
## sigma^2.3 0.1881 0.4337 76 no PFAS_ID
## sigma^2.4 0.1658 0.4072 976 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 974) = 25337.5960, p-val < .0001
##
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 974) = 6.2673, p-val = 0.0125
##
## Model Results:
##
## estimate se tval df pval
## intrcpt 0.7387 0.1072 6.8892 974 <.0001
## Breathing_type_fwwater-breathing only -0.6601 0.2637 -2.5035 974 0.0125
## ci.lb ci.ub
## intrcpt 0.5283 0.9491 ***
## Breathing_type_fwwater-breathing only -1.1775 -0.1427 *
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Note: Maximum TMF value set to 12
TMFs estimated in food webs consisting exclusively of water-breathing organisms are significantly lower by 51% (p = 0.0125, 95% CI: 30-86%) compared to those in food webs that include both water- and air-breathing organisms.
Studies where a primary producer was used as the base of the food web are likely to have different biomagnification estimates compared to those with a primary consumer. We predict that an extension of the food web towards the lowest trophic level will likely affect the biomagnification estimates.
TL_lowest_model <- run_model(TMF_data2_TL_lowest, ~scale(TL_lowest))
save(TL_lowest_model, file = here("Rdata", "TL_lowest_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 832; method: REML)
##
## logLik Deviance AIC BIC AICc
## -900.0095 1800.0190 1812.0190 1840.3476 1812.1211
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1336 0.3655 52 no Study_ID
## sigma^2.2 0.0596 0.2442 95 no FW_ID
## sigma^2.3 0.2163 0.4650 70 no PFAS_ID
## sigma^2.4 0.0544 0.2332 832 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 830) = 1585.8852, p-val < .0001
##
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 830) = 7.7262, p-val = 0.0056
##
## Model Results:
##
## estimate se tval df pval ci.lb ci.ub
## intrcpt 0.6083 0.1038 5.8630 830 <.0001 0.4047 0.8120 ***
## scale(TL_lowest) -0.1526 0.0549 -2.7796 830 0.0056 -0.2603 -0.0448 **
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The model shows a significant negative relationship (0.86 factors for each trophic level; p = 0.0056) between TMF and the trophic level of the baseline organism in food webs.
Higher trophic-level organisms typically have higher lipid contents and longer lifespans. These allow more bioaccumulation over time, leading to higher concentrations in the top predators. We predict that an extension of the food web towards the highest trophic level will likely affect the biomagnification estimates.
TL_highest_model <- run_model(TMF_data2_TL_highest, ~scale(TL_highest))
save(TL_highest_model, file = here("Rdata", "TL_highest_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 832; method: REML)
##
## logLik Deviance AIC BIC AICc
## -844.6255 1689.2511 1701.2511 1729.5796 1701.3531
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1585 0.3982 52 no Study_ID
## sigma^2.2 0.0661 0.2572 95 no FW_ID
## sigma^2.3 0.2018 0.4492 70 no PFAS_ID
## sigma^2.4 0.1019 0.3192 832 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 830) = 2251.4851, p-val < .0001
##
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 830) = 0.3917, p-val = 0.5316
##
## Model Results:
##
## estimate se tval df pval ci.lb ci.ub
## intrcpt 0.7623 0.1156 6.5916 830 <.0001 0.5353 0.9893 ***
## scale(TL_highest) -0.0425 0.0678 -0.6259 830 0.5316 -0.1756 0.0907
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The model does not show a direct linear relationship.
Bioaccumulation occurs at each trophic transfer. With more trophic levels, contaminants accumulate to higher concentrations at the top predators. We predict that a broader range in trophic levels will likely increase the biomagnification estimate.
fw_length_model <- run_model(TMF_data2_fw_length, ~scale(fw_length))
save(fw_length_model, file = here("Rdata", "fw_length_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 832; method: REML)
##
## logLik Deviance AIC BIC AICc
## -844.8111 1689.6223 1701.6223 1729.9508 1701.7243
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1618 0.4022 52 no Study_ID
## sigma^2.2 0.0685 0.2617 95 no FW_ID
## sigma^2.3 0.2027 0.4503 70 no PFAS_ID
## sigma^2.4 0.1018 0.3190 832 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 830) = 2245.3820, p-val < .0001
##
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 830) = 0.0138, p-val = 0.9066
##
## Model Results:
##
## estimate se tval df pval ci.lb ci.ub
## intrcpt 0.7604 0.1162 6.5454 830 <.0001 0.5324 0.9884 ***
## scale(fw_length) 0.0082 0.0697 0.1174 830 0.9066 -0.1286 0.1450
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The model does not show a direct linear relationship.
sum(is.na(TMF_data2$n_species)) #checking the amount of NAs
#[1] 0
n_species_model <- run_model(TMF_data2, ~scale(n_species))
save(n_species_model, file = here("Rdata", "n_species_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 986; method: REML)
##
## logLik Deviance AIC BIC AICc
## -1014.8973 2029.7946 2041.7946 2071.1444 2041.8806
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1743 0.4175 62 no Study_ID
## sigma^2.2 0.0797 0.2823 115 no FW_ID
## sigma^2.3 0.1896 0.4354 77 no PFAS_ID
## sigma^2.4 0.1665 0.4080 986 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 984) = 24762.7428, p-val < .0001
##
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 984) = 0.0846, p-val = 0.7712
##
## Model Results:
##
## estimate se tval df pval ci.lb ci.ub
## intrcpt 0.6941 0.1036 6.6968 984 <.0001 0.4907 0.8975 ***
## scale(n_species) -0.0212 0.0730 -0.2909 984 0.7712 -0.1645 0.1220
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The model does not show a direct linear relationship.
Laboratory studies in which fish are exposed to contaminants solely through diet observed a direct positive relation between biomagnification factors and the number of carbon atoms. We predict that an increase in the chain length increases the biomagnification estimate.
ccl_model <- run_model(TMF_data2_ccl, ~scale(ccl))
save(ccl_model, file = here("Rdata", "ccl_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 953; method: REML)
##
## logLik Deviance AIC BIC AICc
## -965.9575 1931.9150 1943.9150 1973.0600 1944.0039
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.2005 0.4478 60 no Study_ID
## sigma^2.2 0.0553 0.2351 113 no FW_ID
## sigma^2.3 0.2294 0.4790 72 no PFAS_ID
## sigma^2.4 0.1080 0.3286 953 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 951) = 2554.0268, p-val < .0001
##
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 951) = 0.6648, p-val = 0.4151
##
## Model Results:
##
## estimate se tval df pval ci.lb ci.ub
## intrcpt 0.6641 0.1172 5.6674 951 <.0001 0.4342 0.8941 ***
## scale(ccl) 0.0580 0.0712 0.8153 951 0.4151 -0.0816 0.1977
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The model does not show a direct linear relationship.
There is no clear mechanistic explanation for the greater bioaccumulation potential of acids containing a sulfonyl functional group. Nevertheless, Jones et al. (2003) showed that sulfonic acids bind strongly to proteins. We predict that perfluoroalkyl sulfonates (PFSA) are more bioaccumulative in the food web than perfluoroalkyl carboxylic acids (PFCAs) of the same fluorinated carbon chain length.
class_model <- run_model(TMF_data2_class, ~ Class - 1)
save(class_model, file = here("Rdata", "class_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 967; method: REML)
##
## logLik Deviance AIC BIC AICc
## -977.0261 1954.0522 1970.0522 2009.0127 1970.2032
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1988 0.4459 60 no Study_ID
## sigma^2.2 0.0539 0.2321 113 no FW_ID
## sigma^2.3 0.2224 0.4716 74 no PFAS_ID
## sigma^2.4 0.1074 0.3277 967 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 963) = 2507.7374, p-val < .0001
##
## Test of Moderators (coefficients 1:4):
## F(df1 = 4, df2 = 963) = 8.7733, p-val < .0001
##
## Model Results:
##
## estimate se tval df pval
## ClassEmerging PFAS 0.7186 0.2989 2.4041 963 0.0164
## ClassPFCA 0.6231 0.1535 4.0579 963 <.0001
## ClassPFSA 0.8013 0.1717 4.6664 963 <.0001
## ClassPrecursor and/or intermediate 0.5826 0.1528 3.8136 963 0.0001
## ci.lb ci.ub
## ClassEmerging PFAS 0.1320 1.3052 *
## ClassPFCA 0.3218 0.9244 ***
## ClassPFSA 0.4643 1.1383 ***
## ClassPrecursor and/or intermediate 0.2828 0.8823 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning: Removed 15 rows containing missing values or values outside the scale range
## (`position_quasirandom()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_segment()`).
The following alluvial plot provides a visual assessment of the potential correlations between categorical variables.
cor.test(TMF_data2$fw_length, TMF_data2$TL_highest)
##
## Pearson's product-moment correlation
##
## data: TMF_data2$fw_length and TMF_data2$TL_highest
## t = 19.7, df = 877, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5062819 0.5980845
## sample estimates:
## cor
## 0.5538643
cor.test(TMF_data2$fw_length, TMF_data2$n_species)
##
## Pearson's product-moment correlation
##
## data: TMF_data2$fw_length and TMF_data2$n_species
## t = 8.5158, df = 877, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2141473 0.3363366
## sample estimates:
## cor
## 0.2763584
Mixed-effects meta-regression model. Multivariate meta-analysis model with multiple covariates, using a multilevel random-effects approach.
full_model <- rma.mv(yi = ln_TMF,
V = VCV,
mods = ~ 1 +
Sample_type +
Conc_determ_method +
Undetected_strategy_LOD +
Regression_variable +
TEF +
scale(Latitude_abs) +
Breathing_type +
Ecosystem +
Breathing_type_fw +
TL_lowest +
scale(fw_length) +
scale(ccl) +
Class,
random = list(~1|Study_ID,
~1|FW_ID,
~1|PFAS_ID,
~1|TMF_ID
),
test = "t",
data = TMF_data2,
sparse = TRUE)
save(full_model, file = here("Rdata", "full_model.RData"))
##
## Multivariate Meta-Analysis Model (k = 440; method: REML)
##
## logLik Deviance AIC BIC AICc
## -386.3437 772.6874 848.6874 1000.9288 856.7637
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1386 0.3723 22 no Study_ID
## sigma^2.2 0.0000 0.0001 43 no FW_ID
## sigma^2.3 0.2544 0.5044 43 no PFAS_ID
## sigma^2.4 0.1536 0.3919 440 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 406) = 5840.6668, p-val < .0001
##
## Test of Moderators (coefficients 2:34):
## F(df1 = 33, df2 = 406) = 2.5553, p-val < .0001
##
## Model Results:
##
## estimate
## intrcpt 3.2011
## Sample_typespecific tissues -1.8169
## Sample_typespecific tissues and whole-organisms 0.5075
## Conc_determ_methodapparent chemical activities corrected concentrations 0.0899
## Conc_determ_methoddry weight concentrations (dw) 0.2722
## Conc_determ_methodlipid equivalent corrected concentrations (lw) 0.1078
## Conc_determ_methodpolar lipid equivalent corrected concentrations -0.4136
## Conc_determ_methodprotein equivalent corrected concentrations (pw) 0.2156
## Conc_determ_methodwet weight concentrations (ww) 0.5323
## Undetected_strategy_LODcensored regression functions 1.2095
## Undetected_strategy_LODexclusion of adata -1.5558
## Undetected_strategy_LODimputation -2.2647
## Undetected_strategy_LODMaximum likelihood estimation (MLE) -1.7849
## Undetected_strategy_LODRandom numbers below half of the limit value -0.2993
## Undetected_strategy_LODrandom numbers between 0 and the limit 1.0956
## Undetected_strategy_LODrandom numbers between 0 and the limit value -0.1041
## Undetected_strategy_LODRegression on Order Statistics (ROS) -1.2287
## Undetected_strategy_LODthe limit value divided by the square root of two -1.5442
## Undetected_strategy_LODthe limit value divided by two -0.7702
## Regression_variableδ15N 2.1634
## TEF2.4 -0.6205
## TEF3.4 -0.0190
## TEF3.8 -0.8098
## scale(Latitude_abs) 0.1254
## Breathing_typeWater breathing 0.6712
## Ecosystemestuarine/tidal/brackish 0.2800
## Ecosystemfreshwater -1.8385
## Ecosystemterrestrial -0.9817
## TL_lowest -0.5672
## scale(fw_length) -0.0138
## scale(ccl) 0.1232
## ClassPFCA -0.4078
## ClassPFSA -0.1957
## ClassPrecursor and/or intermediate -0.4623
## se
## intrcpt 1.7779
## Sample_typespecific tissues 0.7425
## Sample_typespecific tissues and whole-organisms 0.1232
## Conc_determ_methodapparent chemical activities corrected concentrations 0.1695
## Conc_determ_methoddry weight concentrations (dw) 0.1597
## Conc_determ_methodlipid equivalent corrected concentrations (lw) 0.1619
## Conc_determ_methodpolar lipid equivalent corrected concentrations 0.1738
## Conc_determ_methodprotein equivalent corrected concentrations (pw) 0.1602
## Conc_determ_methodwet weight concentrations (ww) 0.1640
## Undetected_strategy_LODcensored regression functions 1.3286
## Undetected_strategy_LODexclusion of adata 1.9391
## Undetected_strategy_LODimputation 1.7172
## Undetected_strategy_LODMaximum likelihood estimation (MLE) 3.5915
## Undetected_strategy_LODRandom numbers below half of the limit value 1.1704
## Undetected_strategy_LODrandom numbers between 0 and the limit 0.6400
## Undetected_strategy_LODrandom numbers between 0 and the limit value 0.7526
## Undetected_strategy_LODRegression on Order Statistics (ROS) 4.7521
## Undetected_strategy_LODthe limit value divided by the square root of two 0.8642
## Undetected_strategy_LODthe limit value divided by two 0.9553
## Regression_variableδ15N 1.2412
## TEF2.4 1.1555
## TEF3.4 0.6833
## TEF3.8 0.8691
## scale(Latitude_abs) 0.2750
## Breathing_typeWater breathing 0.9618
## Ecosystemestuarine/tidal/brackish 0.7638
## Ecosystemfreshwater 1.0342
## Ecosystemterrestrial 0.6138
## TL_lowest 0.3377
## scale(fw_length) 0.1302
## scale(ccl) 0.1029
## ClassPFCA 0.5404
## ClassPFSA 0.5484
## ClassPrecursor and/or intermediate 0.5188
## tval
## intrcpt 1.8005
## Sample_typespecific tissues -2.4470
## Sample_typespecific tissues and whole-organisms 4.1206
## Conc_determ_methodapparent chemical activities corrected concentrations 0.5301
## Conc_determ_methoddry weight concentrations (dw) 1.7047
## Conc_determ_methodlipid equivalent corrected concentrations (lw) 0.6660
## Conc_determ_methodpolar lipid equivalent corrected concentrations -2.3801
## Conc_determ_methodprotein equivalent corrected concentrations (pw) 1.3458
## Conc_determ_methodwet weight concentrations (ww) 3.2453
## Undetected_strategy_LODcensored regression functions 0.9104
## Undetected_strategy_LODexclusion of adata -0.8023
## Undetected_strategy_LODimputation -1.3188
## Undetected_strategy_LODMaximum likelihood estimation (MLE) -0.4970
## Undetected_strategy_LODRandom numbers below half of the limit value -0.2558
## Undetected_strategy_LODrandom numbers between 0 and the limit 1.7117
## Undetected_strategy_LODrandom numbers between 0 and the limit value -0.1383
## Undetected_strategy_LODRegression on Order Statistics (ROS) -0.2586
## Undetected_strategy_LODthe limit value divided by the square root of two -1.7868
## Undetected_strategy_LODthe limit value divided by two -0.8062
## Regression_variableδ15N 1.7430
## TEF2.4 -0.5370
## TEF3.4 -0.0278
## TEF3.8 -0.9318
## scale(Latitude_abs) 0.4558
## Breathing_typeWater breathing 0.6978
## Ecosystemestuarine/tidal/brackish 0.3666
## Ecosystemfreshwater -1.7778
## Ecosystemterrestrial -1.5994
## TL_lowest -1.6799
## scale(fw_length) -0.1058
## scale(ccl) 1.1979
## ClassPFCA -0.7547
## ClassPFSA -0.3569
## ClassPrecursor and/or intermediate -0.8911
## df
## intrcpt 406
## Sample_typespecific tissues 406
## Sample_typespecific tissues and whole-organisms 406
## Conc_determ_methodapparent chemical activities corrected concentrations 406
## Conc_determ_methoddry weight concentrations (dw) 406
## Conc_determ_methodlipid equivalent corrected concentrations (lw) 406
## Conc_determ_methodpolar lipid equivalent corrected concentrations 406
## Conc_determ_methodprotein equivalent corrected concentrations (pw) 406
## Conc_determ_methodwet weight concentrations (ww) 406
## Undetected_strategy_LODcensored regression functions 406
## Undetected_strategy_LODexclusion of adata 406
## Undetected_strategy_LODimputation 406
## Undetected_strategy_LODMaximum likelihood estimation (MLE) 406
## Undetected_strategy_LODRandom numbers below half of the limit value 406
## Undetected_strategy_LODrandom numbers between 0 and the limit 406
## Undetected_strategy_LODrandom numbers between 0 and the limit value 406
## Undetected_strategy_LODRegression on Order Statistics (ROS) 406
## Undetected_strategy_LODthe limit value divided by the square root of two 406
## Undetected_strategy_LODthe limit value divided by two 406
## Regression_variableδ15N 406
## TEF2.4 406
## TEF3.4 406
## TEF3.8 406
## scale(Latitude_abs) 406
## Breathing_typeWater breathing 406
## Ecosystemestuarine/tidal/brackish 406
## Ecosystemfreshwater 406
## Ecosystemterrestrial 406
## TL_lowest 406
## scale(fw_length) 406
## scale(ccl) 406
## ClassPFCA 406
## ClassPFSA 406
## ClassPrecursor and/or intermediate 406
## pval
## intrcpt 0.0725
## Sample_typespecific tissues 0.0148
## Sample_typespecific tissues and whole-organisms <.0001
## Conc_determ_methodapparent chemical activities corrected concentrations 0.5963
## Conc_determ_methoddry weight concentrations (dw) 0.0890
## Conc_determ_methodlipid equivalent corrected concentrations (lw) 0.5058
## Conc_determ_methodpolar lipid equivalent corrected concentrations 0.0178
## Conc_determ_methodprotein equivalent corrected concentrations (pw) 0.1791
## Conc_determ_methodwet weight concentrations (ww) 0.0013
## Undetected_strategy_LODcensored regression functions 0.3632
## Undetected_strategy_LODexclusion of adata 0.4228
## Undetected_strategy_LODimputation 0.1880
## Undetected_strategy_LODMaximum likelihood estimation (MLE) 0.6195
## Undetected_strategy_LODRandom numbers below half of the limit value 0.7983
## Undetected_strategy_LODrandom numbers between 0 and the limit 0.0877
## Undetected_strategy_LODrandom numbers between 0 and the limit value 0.8900
## Undetected_strategy_LODRegression on Order Statistics (ROS) 0.7961
## Undetected_strategy_LODthe limit value divided by the square root of two 0.0747
## Undetected_strategy_LODthe limit value divided by two 0.4206
## Regression_variableδ15N 0.0821
## TEF2.4 0.5916
## TEF3.4 0.9779
## TEF3.8 0.3520
## scale(Latitude_abs) 0.6488
## Breathing_typeWater breathing 0.4857
## Ecosystemestuarine/tidal/brackish 0.7141
## Ecosystemfreshwater 0.0762
## Ecosystemterrestrial 0.1105
## TL_lowest 0.0938
## scale(fw_length) 0.9158
## scale(ccl) 0.2317
## ClassPFCA 0.4509
## ClassPFSA 0.7213
## ClassPrecursor and/or intermediate 0.3734
## ci.lb
## intrcpt -0.2940
## Sample_typespecific tissues -3.2765
## Sample_typespecific tissues and whole-organisms 0.2654
## Conc_determ_methodapparent chemical activities corrected concentrations -0.2433
## Conc_determ_methoddry weight concentrations (dw) -0.0417
## Conc_determ_methodlipid equivalent corrected concentrations (lw) -0.2105
## Conc_determ_methodpolar lipid equivalent corrected concentrations -0.7551
## Conc_determ_methodprotein equivalent corrected concentrations (pw) -0.0993
## Conc_determ_methodwet weight concentrations (ww) 0.2099
## Undetected_strategy_LODcensored regression functions -1.4023
## Undetected_strategy_LODexclusion of adata -5.3677
## Undetected_strategy_LODimputation -5.6403
## Undetected_strategy_LODMaximum likelihood estimation (MLE) -8.8452
## Undetected_strategy_LODRandom numbers below half of the limit value -2.6001
## Undetected_strategy_LODrandom numbers between 0 and the limit -0.1627
## Undetected_strategy_LODrandom numbers between 0 and the limit value -1.5836
## Undetected_strategy_LODRegression on Order Statistics (ROS) -10.5705
## Undetected_strategy_LODthe limit value divided by the square root of two -3.2432
## Undetected_strategy_LODthe limit value divided by two -2.6482
## Regression_variableδ15N -0.2766
## TEF2.4 -2.8920
## TEF3.4 -1.3622
## TEF3.8 -2.5182
## scale(Latitude_abs) -0.4153
## Breathing_typeWater breathing -1.2196
## Ecosystemestuarine/tidal/brackish -1.2214
## Ecosystemfreshwater -3.8716
## Ecosystemterrestrial -2.1882
## TL_lowest -1.2310
## scale(fw_length) -0.2697
## scale(ccl) -0.0790
## ClassPFCA -1.4701
## ClassPFSA -1.2737
## ClassPrecursor and/or intermediate -1.4823
## ci.ub
## intrcpt 6.6962
## Sample_typespecific tissues -0.3573
## Sample_typespecific tissues and whole-organisms 0.7496
## Conc_determ_methodapparent chemical activities corrected concentrations 0.4231
## Conc_determ_methoddry weight concentrations (dw) 0.5860
## Conc_determ_methodlipid equivalent corrected concentrations (lw) 0.4261
## Conc_determ_methodpolar lipid equivalent corrected concentrations -0.0720
## Conc_determ_methodprotein equivalent corrected concentrations (pw) 0.5306
## Conc_determ_methodwet weight concentrations (ww) 0.8547
## Undetected_strategy_LODcensored regression functions 3.8214
## Undetected_strategy_LODexclusion of adata 2.2561
## Undetected_strategy_LODimputation 1.1110
## Undetected_strategy_LODMaximum likelihood estimation (MLE) 5.2755
## Undetected_strategy_LODRandom numbers below half of the limit value 2.0015
## Undetected_strategy_LODrandom numbers between 0 and the limit 2.3538
## Undetected_strategy_LODrandom numbers between 0 and the limit value 1.3754
## Undetected_strategy_LODRegression on Order Statistics (ROS) 8.1131
## Undetected_strategy_LODthe limit value divided by the square root of two 0.1547
## Undetected_strategy_LODthe limit value divided by two 1.1078
## Regression_variableδ15N 4.6033
## TEF2.4 1.6511
## TEF3.4 1.3242
## TEF3.8 0.8987
## scale(Latitude_abs) 0.6660
## Breathing_typeWater breathing 2.5620
## Ecosystemestuarine/tidal/brackish 1.7815
## Ecosystemfreshwater 0.1945
## Ecosystemterrestrial 0.2249
## TL_lowest 0.0966
## scale(fw_length) 0.2421
## scale(ccl) 0.3254
## ClassPFCA 0.6545
## ClassPFSA 0.8823
## ClassPrecursor and/or intermediate 0.5576
##
## intrcpt .
## Sample_typespecific tissues *
## Sample_typespecific tissues and whole-organisms ***
## Conc_determ_methodapparent chemical activities corrected concentrations
## Conc_determ_methoddry weight concentrations (dw) .
## Conc_determ_methodlipid equivalent corrected concentrations (lw)
## Conc_determ_methodpolar lipid equivalent corrected concentrations *
## Conc_determ_methodprotein equivalent corrected concentrations (pw)
## Conc_determ_methodwet weight concentrations (ww) **
## Undetected_strategy_LODcensored regression functions
## Undetected_strategy_LODexclusion of adata
## Undetected_strategy_LODimputation
## Undetected_strategy_LODMaximum likelihood estimation (MLE)
## Undetected_strategy_LODRandom numbers below half of the limit value
## Undetected_strategy_LODrandom numbers between 0 and the limit .
## Undetected_strategy_LODrandom numbers between 0 and the limit value
## Undetected_strategy_LODRegression on Order Statistics (ROS)
## Undetected_strategy_LODthe limit value divided by the square root of two .
## Undetected_strategy_LODthe limit value divided by two
## Regression_variableδ15N .
## TEF2.4
## TEF3.4
## TEF3.8
## scale(Latitude_abs)
## Breathing_typeWater breathing
## Ecosystemestuarine/tidal/brackish
## Ecosystemfreshwater .
## Ecosystemterrestrial
## TL_lowest .
## scale(fw_length)
## scale(ccl)
## ClassPFCA
## ClassPFSA
## ClassPrecursor and/or intermediate
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eval(metafor:::.MuMIn) # use eval() function to extract helper functions from MuMIn and make them usable in metafor.
mod.candidate <- dredge(full_model, beta = "none", evaluate = TRUE, rank = "AICc", trace=2) # dredge to produce all possible models
# Save the candidate best model
save(mod.candidate, file = here("Rdata", "best_model.RData"))
# Increase text size for x and y axes
par(cex.axis = 1.5)
# Increase label size for x and y axes
par(cex.lab = 1.5)
funnel(
full_model,
yaxis = "seinv", # Inverse of standard error (precision) as the y axis
level = c(90, 95), # levels of statistical significance highlighted
shade = c("white", "gray55"), # shades for different levels of statistical significance
back = "#EBEBEB",
legend = FALSE, # display legend
ylab = "Precision (1/SE)",
cex.lab = 1.5,
digits = 1,
xlim = c(-5,5)
)
There is no evidence of publication bias.
MLMR_mod_year.c <- metafor::rma.mv(ln_TMF,
VCV,
mods = ~ scale(year_publication),
random = list(~1|Study_ID,
~1|FW_ID,
~1|PFAS_ID,
~1|TMF_ID
),
data = TMF_data2)
save(MLMR_mod_year.c, file = here("Rdata", "MLMR_mod_year.c.RData"))
##
## Multivariate Meta-Analysis Model (k = 986; method: REML)
##
## logLik Deviance AIC BIC AICc
## -1014.9164 2029.8329 2041.8329 2071.1826 2041.9189
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2.1 0.1797 0.4239 62 no Study_ID
## sigma^2.2 0.0779 0.2790 115 no FW_ID
## sigma^2.3 0.1889 0.4347 77 no PFAS_ID
## sigma^2.4 0.1665 0.4080 986 no TMF_ID
##
## Test for Residual Heterogeneity:
## QE(df = 984) = 24926.4247, p-val < .0001
##
## Test of Moderators (coefficient 2):
## QM(df = 1) = 0.0001, p-val = 0.9908
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt 0.6946 0.1045 6.6479 <.0001 0.4898 0.8994
## scale(year_publication) -0.0008 0.0736 -0.0115 0.9908 -0.1451 0.1434
##
## intrcpt ***
## scale(year_publication)
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
There is no evidence of publication time bias.